// 6_figures.do

clear all
capture log close	
cd "projectfolder"
use "analysis_data_organized.dta", clear


qui reg EA_new firstborn ea_new_ukb_ld ea_new_23me_ukb_ld rank family_size last_child sex
keep if e(sample)

egen N_famid_new=count(famid) if famid!=., by(famid)
tab N_famid_new
drop if N_famid_new==1
count

global pgs_new ea_new_ukb_ld ea_new_ukb_ld_0 ea_new_ukb_ld_1 ea_new_23me_ukb_ld ea_23me_ld
foreach i in $pgs_new {
	qui sum `i', detail
	replace `i' = (`i' - r(mean))/r(sd)
}

********************************************************************
* FIGURE 1. THE STANDARDIZED POLYGENIC SCORE AND YEARS OF EDUCATION
********************************************************************
xtile       PGS = ea_new_ukb_ld, nquantiles(200)
bys         PGS: egen meanP = mean(ea_new_ukb_ld)
bys         PGS: egen meanD = mean(EA_new)
egen        tag = tag(meanP meanD)
twoway  (scatter meanD ea_new_ukb_ld if tag==1) ///
                (lpoly EA_new ea_new_ukb_ld) ///
                (kdensity ea_new_ukb_ld, yaxis(2)) ///
                if ea_new_ukb_ld>-4 & ea_new_ukb_ld<4 , ///
                legend(off) ///
                xtitle("PGS for Years of Education") ytitle("Years of Education" " ") ytitle("Density", axis(2)) scheme(s1mono) ///
				title("Figure 1. The standardized polygenic score and years of education.", placement(center)) note("Notes: The scatter plot is based on 200 bins of the polygenic score distribution. The dots show the average years of education for each bin.", position(12))
*graph save EAPGS_EA_new.gph, replace
graph export "Figure_1.pdf", as(pdf) name("Graph")

drop      PGS meanP meanD tag

pwcorr EA_new ea_new_ukb_ld, sig

***********************************************
* FIGURE 2. BIRTH ORDER AND YEARS OF EDUCATION 
***********************************************
// Residualize EA_new w.r.t. controls 
global controls sex i.YoB i.MoB e_PC* family_size_imp
reg EA_new $controls, robust
predict EA_res, residuals


// Labels
label def later 0 "Firstborn" 1 "Laterborn"
label val laterborn later

* PANEL A
qui ciplot EA_res if famid!=., by (rank) ytitle("Residualized years of education") xtitle("Birth Order") title("Panel A") scheme(s1mono)
graph save rank5_sibling_eares.gph, replace 

* PANEL B
qui ciplot EA_res if famid!=., by (laterborn) ytitle(" ") xtitle("") title("Panel B") scheme(s1mono)
graph save laterborn_siblings_eares.gph, replace

graph combine rank5_sibling_eares.gph laterborn_siblings_eares.gph,  scheme(s1mono) title("Figure 2. Birth order and years of education.", placement(center)) note("Notes: 95% confidence intervals. Years of education is residualized with respect to year and month of birth, family size, gender, and the first 40 PCs of the genetic related matrix.", position(12))

graph export "Figure_2.pdf", as(pdf) name("Graph")

*****************************************************************************
* FIGURE 3. THE RELATIONSHIP BETWEEN BIRTH ORDER AND THE POLYGENIC SCORE FOR 
*           YEARS OF EDUCATION IN THE ANALYSIS SAMPLE  
*****************************************************************************
// MAYBE SHORTEN THIS TITLE, INCONSISTENT WITH SHORT ONES BEFORE 

// Residulize PGS w.r.t. controls
global controls sex i.YoB i.MoB e_PC* family_size_imp
reg  ea_new_ukb_ld $controls, robust
predict ea_new_ukb_ld_res, residuals

* PANEL A
qui ciplot ea_new_ukb_ld_res, by (rank) ytitle("Residualized" "PGS for years of education") xtitle("Birth Rank") title("Panel A") scheme(s1mono) yline(0)
graph save ea_new_ukb_ld_res.gph, replace 

* PANEL B
qui ciplot ea_new_ukb_ld_res, by (laterborn) ytitle(" ") xtitle("") title("Panel B") scheme(s1mono) yline(0)
graph save ea_score&firstborn_eanew_res.gph, replace 

* PANEL C
qui twoway (kdensity ea_new_ukb_ld_res if rank==1) ///
           (kdensity ea_new_ukb_ld_res if rank==2) ///
	       (kdensity ea_new_ukb_ld_res if rank==3) ///
           (kdensity ea_new_ukb_ld_res if rank==4) ///
	       (kdensity ea_new_ukb_ld_res if rank==5), ///
	       ytitle("Kernel density") ///
		   xtitle("Residualized PGS for years of education") ///
		   title("Panel C") ///
	       legend(label(1 "1{superscript:st}") label(2 "2{superscript:nd}") label(3 "3{superscript:rd}") label(4 "4{superscript:th}") label(5 "5{superscript:th}") row(1)) ///
		   scheme(s1mono)
graph save Rank_PGS_eanew_res.gph, replace 

* PANEL D
qui twoway (kdensity ea_new_ukb_ld_res if firstborn==1) ///
           (kdensity ea_new_ukb_ld_res if firstborn==0), ///
	       ytitle(" ") ///
		   xtitle("Residualized PGS for years of education") ///
		   title("Panel D") ///
	       legend(label(1 "Firstborn") label(2 "Laterborn")) ///
		   scheme(s1mono)
graph save Firstborn_PGS_eanew_res.gph, replace 

graph combine ea_new_ukb_ld_res.gph ea_score&firstborn_eanew_res.gph Rank_PGS_eanew_res.gph Firstborn_PGS_eanew_res.gph, scheme(s1mono) title("Figure 3. The relationship between birth order and the polygenic score for years of education" "in the analysis sample.", placement(center)) note("Notes: 95% confidence intervals." "The polygenic score (PGS) for years of education is residualized with respect to" "year and month of birth, family size, gender, and 40 first PCs of the genetic related matrix.", position(12))
graph export "Figure_3.pdf", as(pdf) name("Graph")

*** end of do-file